LVK Demo

Open In Colab

LVK Demo#

! pip install logPsplinePSD[gw] -q

Hide code cell source

%load_ext autoreload
%autoreload 2
%matplotlib inline

Hide code cell source

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from log_psplines.datatypes import Periodogram, Timeseries

FMIN, FMAX = 15, 1024


URL = "https://raw.githubusercontent.com/bilby-dev/bilby/main/bilby/gw/detector/noise_curves/aLIGO_O4_high_asd.txt"


def load_lvk_psd() -> Periodogram:
    df = pd.read_csv(URL, comment="#", sep="\s+", header=None)
    freq, asd = df[0].values, df[1].values
    return Periodogram(freq, asd**2)


def create_white_noise(sampling_frequency: float, duration: float):
    number_of_samples = duration * sampling_frequency
    number_of_samples = int(np.round(number_of_samples))

    number_of_samples = int(np.round(duration * sampling_frequency))
    number_of_frequencies = int(np.round(number_of_samples / 2) + 1)

    frequencies = np.linspace(
        start=0, stop=sampling_frequency / 2, num=number_of_frequencies
    )
    norm1 = 0.5 * duration**0.5
    re1, im1 = np.random.normal(0, norm1, (2, len(frequencies)))
    white_noise = re1 + 1j * im1

    # set DC and Nyquist = 0
    white_noise[0] = 0
    # no Nyquist frequency when N=odd
    if np.mod(number_of_samples, 2) == 0:
        white_noise[-1] = 0

    # python: transpose for use with infft
    white_noise = np.transpose(white_noise)
    frequencies = np.transpose(frequencies)
    return white_noise, frequencies


def get_lvk_noise_realisation(sampling_frequency=4096.0, duration=32.0):
    psd = load_lvk_psd()
    white_noise, frequencies = create_white_noise(sampling_frequency, duration)

    with np.errstate(invalid="ignore"):
        colored_noise = (
            np.interp(frequencies, psd.freqs, psd.power) ** 0.5 * white_noise
        )

    n_samples = int(sampling_frequency * duration)
    colored_noise_time = np.fft.irfft(colored_noise, n=n_samples)
    times = np.arange(n_samples) / sampling_frequency
    ts = Timeseries(times, colored_noise_time)

    # Compute periodogram from the time series
    freqs = np.fft.rfftfreq(n_samples, 1 / sampling_frequency)
    fft_data = np.fft.rfft(colored_noise_time)
    pdgrm_power = np.abs(fft_data) ** 2
    pdgrm = Periodogram(freqs[1:-1], pdgrm_power[1:-1])
    return ts, pdgrm


lvk_psd = load_lvk_psd()
lvk_noise, lvk_pdgrm = get_lvk_noise_realisation()


plt.plot(
    lvk_pdgrm.freqs,
    (lvk_pdgrm.power),
    label="LVK noise realisation",
    color="gray",
    alpha=0.5,
    lw=2,
)
plt.loglog(lvk_psd.freqs, lvk_psd.power, "k-", label="aLIGO O4 design", lw=1)
plt.xlim(FMIN, FMAX)
plt.xlabel("Frequency [Hz]")
plt.ylabel("PSD [1/Hz]");
../../_images/560ed2837c52c9542613b9b97af4105accbf05c15544a23f6c9a890df4bd1882.png

Knot allocation#

# knot allocation

import numpy as np
import pandas as pd
from scipy.interpolate import interp1d


def extract_peaks(vec, window=9, threshold_factor=1.0):
    """
    vec: array-like, the periodogram in original scale
    returns: array with log-peaks, otherwise 0
    """
    vec = np.asarray(vec)
    n = len(vec)

    # Rolling median (window size 9, centered)
    med_psd = (
        pd.Series(vec)
        .rolling(window=window, center=True, min_periods=1)
        .median()
        .to_numpy()
    )

    # Power ratio
    ratio_med = vec / (med_psd + np.finfo(float).eps)
    log_ratio_med = np.log(ratio_med)

    # Replace NaNs with mean
    mean_lr = np.nanmean(log_ratio_med)
    log_ratio_med = np.where(np.isnan(log_ratio_med), mean_lr, log_ratio_med)

    # Threshold = Q3 + 1.5 * (Q3 - Q1)
    Q1 = np.nanquantile(log_ratio_med, 0.25)
    Q3 = np.nanquantile(log_ratio_med, 0.75)
    threshold = (Q3 + 1.5 * (Q3 - Q1)) * threshold_factor

    # Find values greater than threshold
    out = np.zeros(n)
    out[log_ratio_med > threshold] = log_ratio_med[log_ratio_med > threshold]

    return out


from scipy.signal.windows import gaussian


def smooth_peaks(vec, d):
    """
    Returns the smoothed vector with peaks.

    Parameters
    ----------
    vec : array-like, Vector with peaks.
    d : int, Propagation distance to the left and right.
    """
    vec = np.asarray(vec)
    out = vec.copy()
    n = len(vec)

    # Gaussian decay (like gsignal::gausswin in R)
    gauss_win = gaussian(2 * d, std=2.5)
    dec = gauss_win[:d][::-1]  # take first d and reverse

    for i in range(1, d + 1):
        # propagate to the left
        aux1 = np.concatenate([vec[i:], np.zeros(i)])
        # propagate to the right
        aux2 = np.concatenate([np.zeros(i), vec[: n - i]])

        # apply Gaussian decay
        aux1 = aux1 * dec[i - 1]
        aux2 = aux2 * dec[i - 1]

        # update with maximum
        out = np.maximum(out, aux1)
        out = np.maximum(out, aux2)

    # Areas between peaks: aassign
    m0 = np.sum(out) * 0.05  # 5 percent of total peaks
    n0 = np.count_nonzero(out)  # number of non-zeros
    if n0 > 0:
        min_value = m0 / n0
        out[out <= min_value] = min_value

    return out


def knotLoc(vec, k, degree, eqSpaced=False):
    """
    Compute knot locations for B-spline densities.

    Parameters
    ----------
    vec : array-like, Numeric vector containing the smoothed peaks.
    k : int,  Number of B-spline densities.
    degree : int
        Degree of the B-spline densities.
    eqSpaced : bool, optional (default=False)
        If True, returns equidistant knots in [0,1].

    Returns
    -------
    knots : ndarray
        Knot positions in [0,1].
    """
    K = k - degree + 1  # number of internal knots in [0,1]
    vec = np.asarray(vec)

    if eqSpaced:
        knots = np.linspace(0, 1, K)
        return knots

    N = len(vec)

    # Normalized density
    dens = vec / np.sum(vec)
    cumf = np.cumsum(dens)

    # Distribution function (maps [0,1] -> [0,1])
    df = interp1d(
        np.linspace(0, 1, N),
        cumf,
        kind="linear",
        bounds_error=False,
        fill_value=(0, 1),
    )

    # Inverse distribution function
    grid = np.linspace(0, 1, N)
    dfvec = df(grid)
    invDf = interp1d(
        dfvec, grid, kind="linear", bounds_error=False, fill_value=np.nan
    )

    # Internal points (exclude 0 and 1)
    v = np.linspace(0, 1, K)
    v = v[1:-1]

    knots = np.concatenate(([0], invDf(v), [1]))

    return knots


def peak_and_knot_loc(pdgrm):
    # 1. Identifying peaks
    aux = extract_peaks(pdgrm.power, window=9, threshold_factor=1.0)

    # 2. Smoothed peaks
    r = smooth_peaks(aux, 40)

    # 3. Compute knots
    k = knotLoc(vec=r, k=100, degree=3)
    maxFreq = np.max(pdgrm.freqs)  # Maximum frequency
    return r, k * maxFreq
lvk_std = np.std(lvk_noise.y)
noise_standardised = (lvk_noise.y - np.mean(lvk_noise.y)) / lvk_std
freqs = np.fft.rfftfreq(lvk_noise.n, 1 / lvk_noise.fs)
pdgrm = Periodogram(
    freqs=freqs, power=np.abs(np.fft.rfft(noise_standardised)) ** 2
)
pdgrm = pdgrm.cut(FMIN, FMAX)
plt.loglog(pdgrm.freqs, pdgrm.power, color="gray", alpha=0.5)
[<matplotlib.lines.Line2D at 0x7fa503910f40>]
../../_images/4dca6382051edc71b43c110341a77c31c49868ce0c064ba05dfa68eff4c1272e.png
import matplotlib.pyplot as plt
from scipy.interpolate import interp1d

# 1. Identifying peaks
aux = extract_peaks(pdgrm.power, window=9, threshold_factor=1.0)

# 2. Smoothed peaks
r = smooth_peaks(aux, 40)

# 3. Plot identified peaks
plt.figure(figsize=(8, 4))
plt.plot(pdgrm.freqs, aux, linestyle="-", color="blue")
plt.title("Identified Peaks")
plt.xlabel("Frequency")
plt.ylabel("log Peaks")
plt.show()

# 4. Plot areas to allocate knots (smoothed peaks)
plt.figure(figsize=(8, 4))
plt.plot(pdgrm.freqs, r, linestyle="-", color="red")
plt.title("Smoothed peaks")
plt.xlabel("Frequency")
plt.ylabel("Smoothed Peaks")
plt.show()

#####################
### Ploting Knots ###
#####################

# 1. Compute knots
k = knotLoc(vec=r, k=100, degree=3)
maxFreq = np.max(pdgrm.freqs)  # Maximum frequency

# 2. Plot LVK periodogram
plt.figure(figsize=(10, 4))
plt.plot(pdgrm.freqs, np.log(pdgrm.power), linestyle="-", color="lightgray")
plt.title("LVK Periodogram")
plt.xlabel("Frequency")
plt.ylabel("Log-periodogram")

# Red points: knot locations
plt.scatter(k * maxFreq, np.full_like(k, -35), color="red", label="Knots")

# 3. Distribution function from smoothed peaks
N = len(r)
dens = r / np.sum(r)
cumf = np.cumsum(dens)

# Interpolation function (distribution function)
df = interp1d(
    np.linspace(0, 1, N),
    cumf,
    kind="linear",
    bounds_error=False,
    fill_value=(0, 1),
)

# 4. Plot areas to allocate knots
plt.figure(figsize=(10, 4))
plt.plot(
    pdgrm.freqs,
    r / np.max(r),
    linestyle="-",
    color="lightgray",
    label="Normalized Smoothed Peaks",
)

# Add cumulative distribution (red line)
plt.plot(
    pdgrm.freqs,
    df(np.array(pdgrm.freqs) / 2048),
    color="red",
    label="CDF",
)

plt.title("Smoothed peaks & CDF")
plt.xlabel("Frequency")
plt.ylabel("Normalized Peaks / CDF")
plt.legend()
plt.show()
../../_images/61f159814153bd1ea4eac13ec7d36ee8c4c609c7b14e9e9af132f9f88ca125ac.png ../../_images/65999fbf4fedd018f03d83bb2ada0b1559a85f24398971dfdaa71bcb797f7203.png ../../_images/0406c8b66db640918d041c5f2c556b50c340d5fa3b11520167798e1d7801f1b4.png ../../_images/1edca4bc4161c62c5342c2a039c822334e1ced2485409608eddcccfc86686852.png
from log_psplines.mcmc import run_mcmc

# Run MCMC
mcmc_results = run_mcmc(
    pdgrm,
    n_warmup=1000,
    n_samples=2000,
    knot_kwargs=dict(knots=k * maxFreq, degree=3),
    verbose=True,
)
NUTS sampler [cpu] [ 0 42]
sample: 100%|██████████| 3000/3000 [00:56<00:00, 52.80it/s, 15 steps of size 3.58e-01. acc. prob=0.90] 
Sampling completed in 58.09 seconds
  ESS min: 2008.0, max: 4957.8
  Runtime: 58.09 sec
from log_psplines.plotting import plot_pdgrm
from IPython.display import display, HTML

fig = plot_pdgrm(idata=mcmc_results, interactive=True)

display(HTML(fig.to_html(include_plotlyjs="cdn")))
# This is the current saved LVK knot allocation method in logPsplinePSD
mcmc_results_v2 = run_mcmc(
    pdgrm,
    n_warmup=1000,
    n_samples=2000,
    knot_kwargs=dict(degree=3, method="lvk"),
    verbose=True,
)
Found 220 spectral line regions using threshold = 8.36
Adaptive binning (d=25): 9 peak regions, 10 zero regions
Generated 119 adaptive knots (15.0-1024.0 Hz)
NUTS sampler [cpu] [ 0 42]
sample: 100%|██████████| 3000/3000 [02:01<00:00, 24.62it/s, 15 steps of size 3.44e-01. acc. prob=0.88] 
Sampling completed in 122.49 seconds
  ESS min: 1285.8, max: 4302.8
  Runtime: 122.49 sec
from log_psplines.plotting import plot_pdgrm
from IPython.display import display, HTML

fig = plot_pdgrm(idata=mcmc_results_v2, interactive=True)
display(HTML(fig.to_html(include_plotlyjs="cdn")))

GW1500914#

from log_psplines.example_datasets.lvk_data import LVKData

data = LVKData.from_event("GW150914")
data.plot_psd()
pdgrm = Periodogram(data.freqs, data.psd)
pdgrm = pdgrm.cut(20, 1000)
Downloading H1 data [1126259454.4 - 1126259458.4]
../../_images/fc3523f102a18e2195b17bf93e5ddea5aa7cf02bc49ddc5677a8e9c1f0b117eb.png
r, k = peak_and_knot_loc(pdgrm)
gw150914_psd_patricio = run_mcmc(
    pdgrm,
    n_warmup=1000,
    n_samples=2000,
    knot_kwargs=dict(degree=3, knots=k),
    verbose=True,
    sampler="mh",
)
Metropolis-Hastings with adaptive step sizes [cpu] [1536900715  689864229]
Sampling completed in 12.87 seconds
Final acceptance rate: 0.452 (target: 0.44)
  ESS min: 2.6, max: 1295.5
  Runtime: 12.87 sec
fig = plot_pdgrm(idata=gw150914_psd_patricio, interactive=True)
display(HTML(fig.to_html(include_plotlyjs="cdn")))
gw150914_psd = run_mcmc(
    pdgrm,
    n_warmup=1000,
    n_samples=2000,
    knot_kwargs=dict(degree=3, method="lvk"),
    verbose=True,
    sampler="mh",
)
Found 64 spectral line regions using threshold = 8.22
Adaptive binning (d=25): 17 peak regions, 17 zero regions
Generated 206 adaptive knots (20.0-1000.0 Hz)
Metropolis-Hastings with adaptive step sizes [cpu] [1536900715  689864229]
Sampling completed in 42.88 seconds
Final acceptance rate: 0.425 (target: 0.44)
  ESS min: 2.4, max: 362.7
  Runtime: 42.88 sec
fig = plot_pdgrm(idata=gw150914_psd, interactive=True)
display(HTML(fig.to_html(include_plotlyjs="cdn")))